In [ ]:
import requests
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as px
import plotly.express as pxe
import plotly.io as pio
pio.renderers.default = 'notebook' #for PLOTY rendering in VSCODE, disable acording to env needs

import os
import glob

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time as ts

import datetime as dt
from datetime import *

from sklearn.linear_model import LinearRegression 
from sklearn.model_selection import train_test_split

from sklearn import preprocessing
from sklearn import utils

import warnings
warnings.filterwarnings('ignore')

TIME CONVERSION FUNCTION¶

For our crawling method we had to format the time signature accorind to our relevent need.
Therefore we created the function "convert24" which take an AM/PM string and converts it to 24hr format

In [ ]:
def convert24(timeStr):
    conv = timeStr
    conv = datetime.strptime(conv, '%I:%M %p')
    return "{:02d}:{:02d}".format(conv.hour, conv.minute)

PHASEMOON CRAWLING USING SELENIUM¶

We decided to use "Selenium webdriver" as our crawling method as it allowed us more control.
The first site "phaseMoon", was a tough one to handle because it contained no "classes" or "id" for any of it's properties.
To solve the issues we faced, we started to dive into the site HTML code and realized that he had a constent stacture of and the data it self was the only dynamic part.
Our solution was to create multiple arrays in side of one another, with each index containing a diffrent row of the data table.
By doing so we managed to accese each element in the data table and extract the data we needed

In [ ]:
# Set Initial Dates 
current_date = dt.date.today()
starting_date = dt.date(2008,1,1)
delta = dt.timedelta(days=1)

# Set Webdriver Patch and Driver it self
# Chnage webDriver directory according to your desktop
serv = Service("/Users/oranmor/Documents/chromedriver")
driver = webdriver.Chrome(service=serv)

with open('Data Frames/data_table.csv', 'w', newline='') as f:
    # fieldName = ['Date', 'Illumination', 'Moon rise', 'Moon set', 'Length','Moon distance', 'Moon angle', 'Phase', 'Next full moon', 'Cycle age', 'Events', 'Site']
    thewriter = csv.DictWriter(f, fieldnames=fieldName)

    thewriter.writeheader()
    while starting_date <= current_date:
      url = 'https://phasesmoon.com/moonday{day}{month}{year}.html'.format(day = starting_date.day, month = starting_date.strftime("%B"), year = starting_date.year) 
      driver.get(url)

      # Declare variable to write
      Phase = ''
      Illumination = ''
      Moon_rise = ''
      Moon_set = ''
      Cycle_age = ''
      Moon_angle = ''
      moon_distance = ''

      # Handaling Timeout events + getting/Users/oranmor/Documents/chromedriver values from site
      try:
        table = WebDriverWait(driver, 10).until( 
            EC.presence_of_element_located((By.TAG_NAME, "table"))
        )
        moonInfo = table.find_element(By.TAG_NAME,"tbody")
        moonInfo = moonInfo.find_elements(By.TAG_NAME,"tr")

        for index, tab in enumerate(moonInfo):
            currData = tab.find_elements(By.TAG_NAME,"td")
            data = currData[1]
            if (index == 0):
              Phase = data.text
            if (index == 1):
               Illumination  = data.text.replace('% Visible', '')
            if (index == 2):
                  Moon_rise = convert24(data.text[0:8])
                  Moon_set = convert24(data.text[11:])
            if (index == 3):
                  Cycle_age =  data.text.replace(' Days', '')
            if (index == 4):
                   Moon_angle = data.text

            if (index == 5):
                  moon_distance = data.text.replace('km', '')

      except:
        driver.quit()

      # Adding data from the site to the CSV file itself
      new_dt = starting_date.strftime("%d/%m/%Y")
      thewriter.writerow({
        'Date': new_dt,
        'Illumination': Illumination, 
        'Moon rise': Moon_rise, 
        'Moon set': Moon_set, 
        'Length': '*',
        'Moon distance': moon_distance,
        'Moon angle': Moon_angle, 
        'Phase': Phase,
        'Next full moon': '*',
        'Cycle age': Cycle_age,
        'Events': '*',
        'Site': 'phasesmoon'
      })
      starting_date += delta
      ts.sleep(0.5)
    driver.quit()

MOONGIANT CRAWLING USING SELENIUM¶

After exploring the web, we found out there are a lot of sites that gave us diffrent data.
To handle the situation we decided to use the "Crawling" method on another site, "MoonGiant"
To this site we used the same crawling method as before with "Selenium Webdrive"

In [ ]:
#return only the numbers in a string
def string_to_only_nums(string): 
    emp_str = ""
    for m in string:
       if m.isdigit():
        emp_str = emp_str + m
    return emp_str

#start google chrome
ser = Service("C:\Program Files (x86)\chromedriver.exe")
driver = webdriver.Chrome(service=ser)
counter = 0
#dates
current_date = dt.date.today() 
starting_date = dt.date(2008,1,1)
delta = dt.timedelta(days=1)

#creates csv file
with open('Data Frames/data_table2.csv', 'a', newline='') as f:
    fieldName = ['Date', 'Illumination', 'Moon rise', 'Moon set', 'Length','Moon distance', 'Moon angle', 'Phase','Next full moon', 'Cycle age', 'Events', 'Site']
    thewriter = csv.DictWriter(f, fieldnames=fieldName)
    thewriter.writeheader()

# while loop from starting date to current date
while starting_date <= current_date:

  #changing the format of the date so it will fit the web sight
  new_dt = starting_date.strftime("%m/%d/%Y")
  url = "https://www.moongiant.com/phase/{}".format(new_dt)
  driver.get(url)
  counter+=1
  try:
    #devide the un needed text from the data
    sight_data = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "moonDetails")))
    sight_data = sight_data.find_elements_by_tag_name("span") 
    # sight_data = driver.find_element_by_id("moonDetails")
    
  
    #Write to the csv file
    with open('data_table2.csv', 'a', newline='') as f:
      thewriter = csv.DictWriter(f, fieldnames=fieldName)
      thewriter.writerow({
        'Date': starting_date.strftime("%d/%m/%Y"),
        'Illumination': string_to_only_nums(sight_data[1].text),
        'Moon rise': '*', 
        'Moon set':'*', 
        'Length': '*',
        'Moon distance':sight_data[4].text,
        'Moon angle': sight_data[3].text, 
        'Phase': sight_data[0].text,
        'Next full moon': '*',
        'Cycle age': sight_data[2].text,
        'Events':  '*',
      })
    # time.sleep(1)
  
  except:
    driver.quit()
  #incremanting the day by one
  starting_date += delta 

#exit web and file
f.close()
driver.quit()

OBSERVING OUR DATAFRAMES¶

After creating our dataframes we took a look to ses the initial data we collected.
By using some built-in functions in "Pandas" library we saw that we have extracted approximately 42,000 cells of data from "PhaseMoon" alone and that none of them is empty (2008 until june of 2022).
Because of that we can tell that no initial missing data cleanup is required

READING MOONPHASE DATEFRAME¶
In [ ]:
dfP = pd.read_csv('Data Frames/data_table.csv')
dfP.shape
Out[ ]:
(5289, 12)
In [ ]:
dfP.isnull().sum()
Out[ ]:
Date              0
Illumination      0
Moon rise         0
Moon set          0
Length            0
Moon distance     0
Moon angle        0
Phase             0
Next full moon    0
Cycle age         0
Events            0
Site              0
dtype: int64
In [ ]:
dfP.head()
Out[ ]:
Date Illumination Moon rise Moon set Length Moon distance Moon angle Phase Next full moon Cycle age Events Site
0 01/01/2008 44.84 0:25 12:02 * 401,804.18 0.50 Waning crescent * 22.63 * phasesmoon
1 02/01/2008 35.54 1:18 12:34 * 403,984.69 0.49 Waning crescent * 23.53 * phasesmoon
2 03/01/2008 26.82 2:10 13:10 * 405,219.35 0.49 Waning crescent * 24.41 * phasesmoon
3 04/01/2008 18.92 3:05 13:49 * 405,476.90 0.49 Waning crescent * 25.30 * phasesmoon
4 05/01/2008 12.09 4:00 14:33 * 404,759.39 0.49 Waning crescent * 26.19 * phasesmoon
In [ ]:
dfP.tail()
Out[ ]:
Date Illumination Moon rise Moon set Length Moon distance Moon angle Phase Next full moon Cycle age Events Site
5284 20/06/2022 63.63 0:00 11:10 * 374,432.28 0.53 Waning gibbous * 20.85 * phasesmoon
5285 21/06/2022 52.88 0:01 12:07 * 379,030.58 0.53 Last quarter * 21.88 * phasesmoon
5286 22/06/2022 42.34 0:34 13:02 * 383,852.70 0.52 Waning crescent * 22.87 * phasesmoon
5287 23/06/2022 32.41 1:06 13:56 * 388,629.06 0.51 Waning crescent * 23.84 * phasesmoon
5288 24/06/2022 23.43 1:38 14:50 * 393,115.36 0.51 Waning crescent * 24.78 * phasesmoon
READING MOONGIANT DATEFRAME¶
In [ ]:
dfG = pd.read_csv('Data frames/data_table2.csv')
dfG.shape
Out[ ]:
(5289, 12)
In [ ]:
dfG.isnull().sum()
Out[ ]:
Date              0
Illumination      0
Moon rise         0
Moon set          0
Length            0
Moon distance     0
Moon angle        0
Phase             7
Next full moon    0
Cycle age         0
Events            0
Site              0
dtype: int64
In [ ]:
dfG.head()
Out[ ]:
Date Illumination Moon rise Moon set Length Moon distance Moon angle Phase Next full moon Cycle age Events Site
0 01/01/2008 39 * * * 403,275.00 0.49 Waning Crescent * 23.19 * MoonGiant
1 02/01/2008 30 * * * 404,869.95 0.49 Waning Crescent * 24.08 * MoonGiant
2 03/01/2008 22 * * * 405,495.38 0.49 Waning Crescent * 24.97 * MoonGiant
3 04/01/2008 15 * * * 405,141.31 0.49 Waning Crescent * 25.86 * MoonGiant
4 05/01/2008 8 * * * 403,829.10 0.49 Waning Crescent * 26.75 * MoonGiant
In [ ]:
dfG.tail()
Out[ ]:
Date Illumination Moon rise Moon set Length Moon distance Moon angle Phase Next full moon Cycle age Events Site
5284 20/06/2022 57 * * * 377,265.07 0.53 Waning Gibbous * 21.50 * MoonGiant
5285 21/06/2022 46 * * * 382,035.67 0.52 Last Quarter * 22.50 * MoonGiant
5286 22/06/2022 36 * * * 386,859.52 0.51 Waning Crescent * 23.48 * MoonGiant
5287 23/06/2022 27 * * * 391,481.02 0.51 Waning Crescent * 24.43 * MoonGiant
5288 24/06/2022 18 * * * 395,678.44 0.50 Waning Crescent * 25.36 * MoonGiant

PRE_PROCESSING¶

MERGING OUR TWO CSV DATAFRAMES TO A SINGLE FILE¶

After getting both of our dataframes we needed to merge them.
For the merge, we decided to make an average out of the two DataFrames values and "inject" the data into a new "CSV" file
Every data that the "Moongiant" DataFrame was missing or created a conflict (like with "Moon phase" collum) we defined the "PhaseMoon" DataFrame to be the "master"

In [ ]:
#merge 2 csv
joined_files = os.path.join("Data Frames","*.csv")
joined_list = glob.glob(joined_files)
merged_df = pd.concat(map(pd.read_csv, joined_list), ignore_index=True)

#outPut new mergerd csv
merged_df.to_csv("Merged.csv", index=False)
FUNCTIONS AND PARAMETERS¶
In [ ]:
row_names= ["Date","Illumination","Moon rise","Moon set","Length","Moon distance","Moon angle","Phase","Next full moon","Cycle age","Events","Site"]
tab2 = len(merged_df)/2
#returns time diff
def getLength(st,rs):
  if(len(st) == 4):
    st = "0"+st
  if(len(rs) == 4):
    rs = "0"+rs
  
  dt_rise = float(f"{(rs)[0:2]}.{(rs)[3:5]}")
  dt_set = float(f"{(st)[0:2]}.{(st)[3:5]}")
  return round((24 - dt_set + dt_rise if dt_set > dt_rise else dt_rise - dt_set), 2)

#returns average
def getAverage(num,num2):
  return round((float(num)+float(num2))/2, 3)

#returns Cycle average
def getICycleAverage(num,num2):
  if(abs(num2 - num) > 20):
    return num2
  else:
    return round((float(num)+float(num2))/2, 3)

#returns average with ","
def getAverageS(num,num2):
  return round((float(num.replace(",",""))+float(num2.replace(",","")))/2, 3)

#returns the date of the next full moon
# def findNextFullMoon(i): 
#   while i < tab2+1 : 
#     if(merged_df.loc[i,row_names[7]] == "Full Moon" or merged_df.loc[i,row_names[7]] == "Full moon"):
#       return merged_df.loc[i+tab2,row_names[0]]
#     i=i+1
#   return "None"
  
#returns the event in the date
def checkEvent(i):
  if (merged_df.loc[i,row_names[7]] == "Full moon" or merged_df.loc[i,row_names[7]] == "Last Quarter" or merged_df.loc[i,row_names[7]] == "First Quarter" or merged_df.loc[i,row_names[7]] == "New Moon"):
    return merged_df.loc[i,row_names[7]]
  else:
    return "None" 

PROCESSING¶

In [ ]:
#creates csv file
with open('processed.csv', 'w', newline='') as f:
    fieldName = ['Date', 'Illumination', 'Moon rise', 'Moon set', 'Length','Moon distance', 'Moon angle', 'Phase','Cycle age', 'Events']
    thewriter = csv.DictWriter(f, fieldnames=fieldName)
    thewriter.writeheader()
j = 1

for i in range(int(tab2)):
  with open('processed.csv', 'a', newline='') as f:
    thewriter = csv.DictWriter(f, fieldnames=fieldName)
    thewriter.writerow({
      'Date': merged_df.loc[i+tab2,row_names[0]],
      'Illumination': getAverage(merged_df.loc[i+tab2,row_names[1]],merged_df.loc[i,row_names[1]]),
      'Moon rise': merged_df.loc[i+tab2,row_names[2]], 
      'Moon set':merged_df.loc[i+tab2,row_names[3]], 
      'Length': getLength(merged_df.loc[i+tab2,row_names[3]],merged_df.loc[i+tab2,row_names[2]]),
      'Moon distance': getAverageS(merged_df.loc[i+tab2,row_names[5]],merged_df.loc[i,row_names[5]]),
      'Moon angle': getAverage(merged_df.loc[i+tab2,row_names[6]],merged_df.loc[i,row_names[6]]), 
      'Phase': merged_df.loc[i+tab2,row_names[7]],
      'Cycle age': getICycleAverage(merged_df.loc[i+tab2,row_names[9]],merged_df.loc[i,row_names[9]]),
      'Events': checkEvent(i+tab2),     
      })  
In [ ]:
df = pd.read_csv('processed.csv')
print(df.shape)
df.head()
(5289, 10)
Out[ ]:
Date Illumination Moon rise Moon set Length Moon distance Moon angle Phase Cycle age Events
0 01/01/2008 41.920 0:25 12:02 12.23 402539.590 0.495 Waning crescent 22.910 None
1 02/01/2008 32.770 1:18 12:34 12.84 404427.320 0.490 Waning crescent 23.805 None
2 03/01/2008 24.410 2:10 13:10 13.00 405357.365 0.490 Waning crescent 24.690 None
3 04/01/2008 16.960 3:05 13:49 13.56 405309.105 0.490 Waning crescent 25.580 None
4 05/01/2008 10.045 4:00 14:33 13.67 404294.245 0.490 Waning crescent 26.470 None
USING THE "DESCRIBE" METHOD TO VIEW OUR ENTIRE SET¶
In [ ]:
df.describe()
Out[ ]:
Illumination Length Moon distance Moon angle Cycle age
count 5289.000000 5289.000000 5289.000000 5289.000000 5289.000000
mean 50.030936 12.027173 384974.931738 0.518072 14.453082
std 35.094871 1.115005 14902.928513 0.020899 8.497870
min 0.000000 9.320000 363354.600000 0.490000 0.000000
25% 15.065000 11.110000 370062.510000 0.500000 7.095000
50% 49.940000 12.020000 385508.230000 0.515000 14.445000
75% 84.880000 12.930000 399876.365000 0.540000 21.830000
max 100.000000 14.670000 405458.665000 0.550000 29.235000

EOD PHASE¶


Considering that our research qeustion is based on the moon known behavior we immediately thought about comparing the data we had to the "Cyle age" collum as it represented an evolving and devolving momvent.

Here we compared the moon age in the cycle to the illumination level and saw how they effeted one another

In [ ]:
plot = px.Figure(data=[px.Scatter(
    x=df['Cycle age'],
    y=df['Illumination'],
    mode='markers',)
])
  
# Add dropdown
plot.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            direction="left",
            buttons=list([
                dict(
                    args=["type", "scatter"],
                    label="Scatter Plot",
                    method="restyle"
                ),
                dict(
                    args=["type", "bar"],
                    label="Bar Chart",
                    method="restyle"
                )
            ]),
        ),
    ]
)
plot.update_layout(autotypenumbers='convert types')
plot.show()

After realizing there's a solid connection between the two collums we wanted to add another "layer" to the graph and see if the moon angle will also play a role.
Watching the results you can clearly see that here the "moon angle" is to diverced and not affected by the age of the cycle.

Therefore seeing that the solid connection is only seen with "Cycle age" and "Illumination"¶

In [ ]:
plot = px.Figure(data=[px.Scatter(
    x=df['Cycle age'],
    y=df['Moon angle'],
    mode='markers',)
])
  
# Add dropdown
plot.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            direction="left",
            buttons=list([
                dict(
                    args=["type", "scatter"],
                    label="Scatter Plot",
                    method="restyle"
                ),
                dict(
                    args=["type", "bar"],
                    label="Bar Chart",
                    method="restyle"
                )
            ]),
        ),
    ]
)
plot.update_layout(autotypenumbers='convert types')
plot.show()
ALL THREE COMBINED¶
In [ ]:
fig = pxe.line_3d(df, x="Cycle age", y="Moon angle",z="Illumination", color="Cycle age")
fig.update_layout(autotypenumbers='convert types')
fig.show()

After seeing the last graphs we decided to compare "Moon angle" with "Moon distance" and see if they some sort of relation to one another

Afte viewing the graph we got we could clearly see a constant decending "line" that indicated a relation between the two collums data

In [ ]:
fig = pxe.scatter(df, x="Moon distance", y="Moon angle")
fig.update_layout(autotypenumbers='convert types')
fig.show()
VIEWING THE MOVEMENT OF "MOON DISTANCE" COLLUM THROUGH OUT THE DATAFRAME¶
In [ ]:
plot = px.Figure(data=[px.Scatter(
    y=df['Moon distance'],
    mode='lines',)
]) 
plot.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                    step="day",
                    stepmode="backward"),
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
    )
)
plot.update_layout(autotypenumbers='convert types')
plot.show()
In [ ]:
for i in range (df.shape[0]):
    df.at[i,'Illumination'] = float(df.loc[i,'Illumination'])
    df.at[i,'Moon angle'] = float(df.loc[i,'Moon angle'])
    df.at[i,'Moon distance'] = float(df.loc[i,'Moon distance'])
    df.at[i,'Cycle age'] = float(df.loc[i,'Cycle age'])

df.head()
Out[ ]:
Date Illumination Moon rise Moon set Length Moon distance Moon angle Phase Cycle age Events
0 01/01/2008 41.920 0:25 12:02 12.23 402539.590 0.495 Waning crescent 22.910 None
1 02/01/2008 32.770 1:18 12:34 12.84 404427.320 0.490 Waning crescent 23.805 None
2 03/01/2008 24.410 2:10 13:10 13.00 405357.365 0.490 Waning crescent 24.690 None
3 04/01/2008 16.960 3:05 13:49 13.56 405309.105 0.490 Waning crescent 25.580 None
4 05/01/2008 10.045 4:00 14:33 13.67 404294.245 0.490 Waning crescent 26.470 None

MACHINE LEARNING¶

PREDICTING "MOON DISTANCE" USING "MOON ANGLE" WITH THE "LINEAR REGRESSION MODEL"¶
In [ ]:
X = np.array(df['Moon angle']).reshape(-1,1)
y =np.array(df['Moon distance']).reshape(-1,1)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

model = LinearRegression()
model = model.fit(X_train,y_train)
acc = model.score(X_test, y_test)
print(f"the accuracy score of predicting 'Moon Distance' using 'Moon angle' is -> {acc}")
print(f"Moon distance when moon angle is at 0.47 -> {model.predict([[0.47]])[0][0]}")
the accuracy score of predicting 'Moon Distance' using 'Moon angle' is -> 0.9917999511059503
Moon distance when moon angle is at 0.47 -> 419100.0574299945
PREDICTING "ILLUMINATION" USING "CYCLE AGE" WITH THE "POLYNOMIAL MODEL"¶

ARRAY CALCULATION METHOD USING NUMPY AND POLYNOMIAL DEGREE¶

In [ ]:
from turtle import color
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

x = np.array(df.head(50)['Cycle age']). reshape(-1,1).ravel()
y = np.array(df.head(50)['Illumination']).reshape(-1,1).ravel()

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2)

mymodel = np.poly1d(np.polyfit(x_train,y_train,4))

myline = np.linspace(1,22,100)
plt.figure(facecolor='#ffffff')
ax = plt.axes()
ax.set_facecolor("#afafaf")
plt.scatter(x_train,y_train)
plt.plot(myline,mymodel(myline), color='red')
plt.show()

print(f"the accuracy score of predicting 'Illumination' using 'Cycle age' is -> {r2_score(y_test,mymodel(x_test))}")
print(f"Illumination when cycle age is at 6.5 -> {mymodel(6.5)}")
the accuracy score of predicting 'Illumination' using 'Cycle age' is -> 0.9977996724258708
Illumination when cycle age is at 6.5 -> 41.65252306672311

"LINEAR REGRESSION MODEL METHOD USING POLYNOMIAL FEATURES¶

In [ ]:
from sklearn.preprocessing import PolynomialFeatures

Xa = np.array(df['Cycle age']).reshape(-1,1)
Ya = np.array(df['Illumination'])

Xa_train, Xa_test, Ya_train, Ya_test = train_test_split(Xa,Ya,test_size=0.2)

polli = PolynomialFeatures(degree=3)
xa_poly = polli.fit_transform(Xa_train)
polli.fit(Xa_train,Ya_train)

modall = LinearRegression()
modall.fit(xa_poly, Ya_train)

y_pred = modall.predict(polli.fit_transform(Xa_test))

print(f"our score is {r2_score(Ya_test,y_pred)}, but we can see that when we're reaching the edge of the cycle we're starting to get some weird values such as the next situation:")
print(f"when setting the cycle age to 28 the result we're getting is -> {modall.predict(polli.fit_transform([[28]]))[0]}, this is situation is not good because the prediction is invalid")
our score is 0.9226166818708854, but we can see that when we're reaching the edge of the cycle we're starting to get some weird values such as the next situation:
when setting the cycle age to 28 the result we're getting is -> -7.382200053681487, this is situation is not good because the prediction is invalid
In [ ]:
Xa = np.array(df['Cycle age']).reshape(-1,1)
Ya = np.array(df['Illumination'])

Xa_train, Xa_test, Ya_train, Ya_test = train_test_split(Xa,Ya,test_size=0.2)

polli = PolynomialFeatures(degree=4)
xa_poly = polli.fit_transform(Xa_train)
polli.fit(Xa_train,Ya_train)

modall = LinearRegression()
modall.fit(xa_poly, Ya_train)

y_pred = modall.predict(polli.fit_transform(Xa_test))

print(f"After raising our degree to 4 we can see that our score is now {r2_score(Ya_test,y_pred)}, at this point we can look what's happening in the same situation as before:")
print(f"when setting the cycle age to 28 the result we're getting is -> {modall.predict(polli.fit_transform([[28]]))[0]}, this is situation is now valid and the prediction is mathes the csv data we have")
After raising our degree to 4 we can see that our score is now 0.9993513969105874, at this point we can look what's happening in the same situation as before:
when setting the cycle age to 28 the result we're getting is -> 2.360911812053791, this is situation is now valid and the prediction is mathes the csv data we have

PREDICTING "ILLUMINATION" USING "CYCLE AGE", "MOON DISTANCE" AND "MOON ANGLE" WITH THE "RANDOM FOREST" METHOD¶

>Here we added the "Moon distance" and "Moon angle" to see how they'll affect the changes of "Illumination" field

In [ ]:
from sklearn.ensemble import RandomForestRegressor

X = df.head(100)[['Cycle age', 'Moon distance', 'Moon angle']]
y = df.head(100)['Illumination']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

# Train Regression models
rF = RandomForestRegressor(random_state=1)
rF.fit(X_train, y_train)
rF_pred = rF.predict(X_test)
some = np.array(y_test).reshape(-1,1).ravel()

print(f"the accuracy score of predicting 'Illumination' using 'Cycle age' 'Moon distance' and 'Moon angle' is -> {r2_score(y_test,rF_pred)}")

plt.figure(facecolor='#ffffff')
ax = plt.axes()
ax.set_facecolor("#afafaf")
plt.xlabel("Sample amount")
plt.ylabel('Illumination precent')
plt.plot(some, linestyle='dashed', color='#000', linewidth=6.0)
plt.plot(rF_pred, linestyle='solid',color='#adefca', linewidth=3.0)

plt.show()
the accuracy score of predicting 'Illumination' using 'Cycle age' 'Moon distance' and 'Moon angle' is -> 0.9986941630965762

MOON CYCLE AGE CALCULATOR BASED ON "JULIAN DATE" AND "SUBSYSTEMS" ARTICLE FROM 2017¶


A -> year / 100 ~Int casting
B -> A / 4 ~Int casting
C -> 2 - A + B
E -> "number of days in year" * ("user year" + 4716) ~Int casting
F -> 30.6001 * ("user month" + 1) ~Int casting
JD -> C + "user day" + E + F - 1524.5
days since last new moon -> JD - 2451549.5
New Moon -> daySinceNew / 29.53{constant cycle days}
Moon Cycle -> (newMoon - Int(newMoon)) * 29.53{constant cycle days}¶

In [ ]:
# based on : https://www.subsystems.us/uploads/9/8/9/4/98948044/moonphase.pdf
# Note that this calculation only works from 2000/01/07 due to moon recalculated speed
userInput = input('Enter date (yyyy/mm/dd) : ')
format = '%Y/%m/%d'
dt_Input = datetime.strptime(userInput, format)
print(dt_Input.date())
A = int(dt_Input.year / 100)
B = int(A / 4)
C = 2- A + B
E = int(365.25 * (dt_Input.year + 4716))
F = int(30.6001 * (dt_Input.month + 1))
JD = C + dt_Input.day + E + F - 1524.5
daySinceNew = JD - 2451549.5
newMoons = daySinceNew / 29.53
moonCycle = (newMoons - int(newMoons)) * 29.53
print("Moon cycle age = ",moonCycle)
2024-07-07
Moon cycle age =  1.4099999999994601
In [ ]:
import plotly
plotly.offline.init_notebook_mode()